import json
from PyCmpltrtok.common import get_dir_name_ext, sep
import os


def jsonl2limit(jsonl_path, limit=0):

    assert(isinstance(limit, int) and limit > 0)
    suffix = f'.limit-{limit}.jsonl'

    xdir, xname, xext = get_dir_name_ext(jsonl_path)
    json_path = os.path.join(xdir, xname + suffix)
    sep('Started: ' + xname)
    with open(jsonl_path, 'r', encoding='utf8') as fin, open(json_path, 'w', encoding='utf8') as fout:

        cnt = -1
        while True:
            # 读取一行
            xline = fin.readline()
            if not xline:  # 返回连换行都没有，是空，那说明已经读取结束
                break  # 中断读取
            cnt += 1
            if not cnt % 1000:
                print(cnt)
            if limit:
                if cnt >= limit:
                    break
            if '\r\n' == xline[-2:]:
                xline = xline[:-2]
            elif '\n' == xline[-1:]:
                xline = xline[:-1]

            fout.write(xline + '\n')

    print(cnt)
    sep('Over: ' + xname)


if '__main__' == __name__:
    jsonl_paths = [
        # r'D:\_dell7590_root\local\LNP_datasets\med\CBLUE\CMeIE-V2\CMeIE-V2_train.jsonl',
        # r'D:\_dell7590_root\local\LNP_datasets\med\CBLUE\CMeIE-V2\CMeIE-V2_dev.jsonl',
        # '/mnt/d/_dell7590_root/local/LNP_datasets/med/CBLUE/CMeIE-V2/CMeIE-V2_train.jsonl',
        # '/mnt/d/_dell7590_root/local/LNP_datasets/med/CBLUE/CMeIE-V2/CMeIE-V2_dev.jsonl',
        r'D:\_dell7590_root\local\LNP_datasets\med\CBLUE\CMeIE-V2-002\CMeIE-V2_train.jsonl',
    ]
    for jsonl_path in jsonl_paths:
        print(jsonl_path)
        jsonl2limit(jsonl_path, limit=20)
    sep('All over')